##
## NC -- Creating the Collapsed File
##

rm(list=ls())

# Library
library(readr)
library(dplyr)
library(tidyr)

# Set the Working Directory
setwd("~/Desktop/PolicingTStops/BadApples/Replication/")

#
# Aggregating by Agency
#

# Load in the data
load("Data/NC_Cleaned.RData")

# Cleaning and Subsetting the Data Further
nc = subset(nc, nc$Purpose != 10)

nc$searchoccur = ifelse(is.na(nc$SearchID),0,
                        ifelse(nc$SearchType=="1"|
                                 nc$SearchType=="3",1,NA))

nc$contra2 = ifelse(!is.na(nc$ContrabandID)&nc$searchoccur==0,NA,
                    ifelse(is.na(nc$ContrabandID),0,1))
nc$stop = ifelse(is.na(nc$searchoccur)|
                   is.na(nc$contra2),NA,1)

# Officer Collapse
nc$prob.cause = ifelse(nc$searchoccur==0,0,
                       ifelse(nc$SearchType==1,1,NA))
nc$contra.pc = ifelse(nc$prob.cause==0,0,
                      ifelse(nc$prob.cause==1,1,NA))
nc$stop = ifelse(is.na(nc$prob.cause)|
                   is.na(nc$contra.pc),NA,1)
nc.agency.pc = aggregate(nc[,c("stop","prob.cause","contra.pc")],
                         by=list(nc$AgencyDescription,nc$race2),
                         function(x){sum(x,na.rm=T)})
nc.agency.pc$Group.3 = "PC"
colnames(nc.agency.pc) = c("agency","race","stop","search","contra","type")

nc$consent = ifelse(nc$searchoccur==0,0,
                    ifelse(nc$SearchType==3,1,NA))
nc$contra.consent = ifelse(nc$consent==0,0,
                           ifelse(nc$consent==1,1,NA))
nc$stop = ifelse(is.na(nc$consent)|
                   is.na(nc$contra.consent),NA,1)
nc.agency.consent = aggregate(nc[,c("stop","consent","contra.consent")],
                              by=list(nc$AgencyDescription,nc$race2),
                              function(x){sum(x,na.rm=T)})
nc.agency.consent$Group.3 = "C"
colnames(nc.agency.consent) = c("agency","race","stop","search","contra","type")

nc$stop = ifelse(is.na(nc$prob.cause)|
                   is.na(nc$contra.pc),NA,1)
nc.agency.all = aggregate(nc[,c("stop","searchoccur","contra2")],
                              by=list(nc$AgencyDescription,nc$race2),
                              function(x){sum(x,na.rm=T)})
nc.agency.all$Group.3 = "ALL"
colnames(nc.agency.all) = c("agency","race","stop","search","contra","type")

nc.ag2 = bind_rows(nc.agency.pc,nc.agency.all)

# Saving the Collapse
save(nc.ag2, file="Data/NCAgency_Search.RData")

###
### Analysis
###

# Clearing the Space
rm(list=ls())

# Libraries
library(readr)
library(dplyr)
library(tidyr)
library(rstan)
library(ggplot2)

# Function
basic_model_fit = function(data = NULL, 
                           model_file = "Scripts/fasttt-master/stan_models/model_mixture.stan",
                           iter_n=12000,seed.set=2938407,
                           delta=0.9,max_trees=12){
  require(rstan)
  
  # Agency set up for running Stan model. 
  stan_data = list(N = nrow(data),
                   R = length(unique(data$race)),
                   D = length(unique(data$agency)),
                   r = as.integer(as.factor(data$race)),
                   d = as.integer(as.factor(data$agency)),
                   n = data$stop,
                   s = data$search,
                   h = data$contra)
  
  # Running the model. 
  rstan_options(auto_write = TRUE)
  options(mc.cores = 5)
  fit = stan(model_file, data=stan_data, 
             warmup = round(iter_n*0.25,0),
             iter=iter_n, 
             chains=5, 
             cores=5, 
             thin = ifelse(iter_n>15000,5,1),
             seed = seed.set,
             control = list(adapt_delta = delta, max_treedepth = max_trees, 
                            adapt_engaged = !FALSE))
  
  return(fit)
}

# Load in the collapsed data set. 
load("Data/NCAgency_Search.RData")

# Cleaning the data set, instituting thresholds, and cropping it. 
nc.ag2.thres = pivot_wider(data=nc.ag2,
                           id_cols=c("agency","type"),
                           names_from="race",
                           values_from = c("stop","search","contra"))
nc.ag2.thres$keep_id = paste(nc.ag2.thres$agency,nc.ag2.thres$type,sep="::")
nc.ag2$keep_id = paste(nc.ag2$agency,nc.ag2$type,sep="::")
nc.keep = nc.ag2.thres$keep_id[which(nc.ag2.thres$stop_Black>49&
                                       nc.ag2.thres$stop_White>49&
                                       nc.ag2.thres$search_Black>0&
                                       nc.ag2.thres$search_White>0)]
nc.ag2$include = ifelse(as.character(nc.ag2$keep_id)%in%nc.keep,1,0)

table(nc.ag2$include)

nc.agency.sm = nc.ag2 %>% filter(include==1) %>%
  filter(race=="Black"|race=="White") %>%
  filter(grepl("UNC ",agency)==F & 
           grepl("University ",agency)==F &
           agency != "Warren County Sheriff's Office")

comp.agency = names(table(nc.agency.sm$agency))[table(nc.agency.sm$agency)==4]
comp = nc.agency.sm %>% 
  filter(agency %in%comp.agency)

# Fitting the models,
pc.stan = basic_model_fit(data=comp%>%filter(type=="PC"),delta=0.995)
save(pc.stan, file="Models/NC_PCmodel50.RData")
all.stan = basic_model_fit(data=comp%>%filter(type=="ALL"))
save(all.stan, file="Models/NC_ALLmodel50.RData")

# NC Plots --- All 
set.seed(987)
t_for_trace = sample(1:394,8)
t_names_for_trace = paste("t_i[",t_for_trace,"]",sep="")

png("Figures/NC_Traceplot_Search_PC50.png",1254,769)
traceplot(pc.stan,
          pars=t_names_for_trace,nrow=2)
dev.off()

png("Figures/NC_Density_Search_PC50.png",629,592)
plot(pc.stan, show_density=TRUE, 
     pars=t_names_for_trace, 
     ci_level=0.95, outer_level=0.999)
dev.off()

png("Figures/NC_Traceplot_Search_All50.png",1254,769)
traceplot(all.stan,
          pars=t_names_for_trace,nrow=2)
dev.off()

png("Figures/NC_Density_Search_All50.png",629,592)
plot(all.stan, show_density=TRUE, 
     pars=t_names_for_trace, 
     ci_level=0.95, outer_level=0.999)
dev.off()

# Extracting the summaries
pc.summary = summary(pc.stan)
all.summary = summary(all.stan)

max(pc.summary$summary[,10],na.rm=T)
max(all.summary$summary[,10],na.rm=T)

pc.ti = pc.summary$summary[2:(nrow(comp%>%filter(type=="PC"))+1),]
pc.output = data.frame("fasttt.mean.pc" = pc.ti[,1],
                       "fasttt.lower.pc" = pc.ti[,1]-
                         1.96*pc.ti[,2],
                       "fasttt.upper.pc" = pc.ti[,1]-
                         1.96*pc.ti[,2])

all.ti = all.summary$summary[2:(nrow(comp%>%filter(type=="ALL"))+1),]
all.output = data.frame("fasttt.mean.all" = all.ti[,1],
                        "fasttt.lower.all" = all.ti[,1]-
                          1.96*all.ti[,2],
                        "fasttt.upper.all" = all.ti[,1]-
                          1.96*all.ti[,2])

nc.search = cbind(comp[1:(788/2),c("agency","race")],all.output,pc.output)
nc.agency.wide = tidyr::pivot_wider(data=nc.search,
                                    id_cols="agency",
                                    names_from="race",
                                    values_from = colnames(nc.search)[3:8])

nc.agency.wide = nc.agency.wide%>%
  mutate(trr.pc = fasttt.mean.pc_White/fasttt.mean.pc_Black,
         trr.all = fasttt.mean.all_White/fasttt.mean.all_Black)

# Saving the data set.
save(nc.agency.wide,file="Data/NC_SearchPurpose50_All.RData")

t.test(x=nc.agency.wide$trr.all,
       y=nc.agency.wide$trr.pc,
       paired=T)

t.test(nc.agency.wide$fasttt.mean.all_Black,
       nc.agency.wide$fasttt.mean.pc_Black,
       paired=T)

t.test(nc.agency.wide$fasttt.mean.all_White,
       nc.agency.wide$fasttt.mean.pc_White)


sd(nc.agency.wide$trr.all)
sd(nc.agency.wide$fasttt.mean.all_Black)
sd(nc.agency.wide$fasttt.mean.all_White)
